/*-------------------------------------------------------------------------
 *
 * smgr.c
 *      public interface routines to storage manager switch.
 *
 *      All file system operations in POSTGRES dispatch through these
 *      routines.
 *
 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * This source code file contains modifications made by THL A29 Limited ("Tencent Modifications").
 * All Tencent Modifications are Copyright (C) 2023 THL A29 Limited.
 *
 * IDENTIFICATION
 *      src/backend/storage/smgr/smgr.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "commands/tablespace.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/inval.h"
#ifdef _MLS_
#include "storage/relcryptstorage.h"
#endif

/*
 * This struct of function pointers defines the API between smgr.c and
 * any individual storage manager module.  Note that smgr subfunctions are
 * generally expected to report problems via elog(ERROR).  An exception is
 * that smgr_unlink should use elog(WARNING), rather than erroring out,
 * because we normally unlink relations during post-commit/abort cleanup,
 * and so it's too late to raise an error.  Also, various conditions that
 * would normally be errors should be allowed during bootstrap and/or WAL
 * recovery --- see comments in md.c for details.
 */
typedef struct f_smgr
{
    void        (*smgr_init) (void);    /* may be NULL */
    void        (*smgr_shutdown) (void);    /* may be NULL */
    void        (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
    void        (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
                                bool isRedo);
    bool        (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
    void        (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum,
                                bool isRedo);
    void        (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
                                BlockNumber blocknum, char *buffer, bool skipFsync);
    void        (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
                                  BlockNumber blocknum);
    void        (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
                              BlockNumber blocknum, char *buffer);
    void        (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
                               BlockNumber blocknum, char *buffer, bool skipFsync);
    void        (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
                                   BlockNumber blocknum, BlockNumber nblocks);
    BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
    void        (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
                                  BlockNumber nblocks);
    void        (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
    void        (*smgr_pre_ckpt) (void);    /* may be NULL */
    void        (*smgr_sync) (void);    /* may be NULL */
    void        (*smgr_post_ckpt) (void);    /* may be NULL */
#ifdef _SHARDING_
    void        (*smgr_dealloc)(SMgrRelation reln, ForkNumber forknum, BlockNumber from_blk);
    void        (*smgr_realloc)(SMgrRelation reln, ForkNumber forknum, BlockNumber from_blk);
#endif
} f_smgr;


static const f_smgr smgrsw[] = {
    /* magnetic disk */
    {mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
        mdprefetch, mdread, mdwrite, mdwriteback, mdnblocks, mdtruncate,
        mdimmedsync, mdpreckpt, mdsync, mdpostckpt
#ifdef _SHARDING_
        ,mddealloc, mdrealloc
#endif
    }
};

static const int NSmgr = lengthof(smgrsw);


/*
 * Each backend has a hashtable that stores all extant SMgrRelation objects.
 * In addition, "unowned" SMgrRelation objects are chained together in a list.
 */
static HTAB *SMgrRelationHash = NULL;

static SMgrRelation first_unowned_reln = NULL;

/* local function prototypes */
static void smgrshutdown(int code, Datum arg);
static void add_to_unowned_list(SMgrRelation reln);
static void remove_from_unowned_list(SMgrRelation reln);


/*
 *    smgrinit(), smgrshutdown() -- Initialize or shut down storage
 *                                  managers.
 *
 * Note: smgrinit is called during backend startup (normal or standalone
 * case), *not* during postmaster start.  Therefore, any resources created
 * here or destroyed in smgrshutdown are backend-local.
 */
void
smgrinit(void)
{
    int            i;

    for (i = 0; i < NSmgr; i++)
    {
        if (smgrsw[i].smgr_init)
            (*(smgrsw[i].smgr_init)) ();
    }

    /* register the shutdown proc */
    on_proc_exit(smgrshutdown, 0);
}

/*
 * on_proc_exit hook for smgr cleanup during backend shutdown
 */
static void
smgrshutdown(int code, Datum arg)
{
    int            i;

    for (i = 0; i < NSmgr; i++)
    {
        if (smgrsw[i].smgr_shutdown)
            (*(smgrsw[i].smgr_shutdown)) ();
    }
}

/*
 *    smgropen() -- Return an SMgrRelation object, creating it if need be.
 *
 *        This does not attempt to actually open the underlying file.
 */
SMgrRelation
smgropen(RelFileNode rnode, BackendId backend)
{
    RelFileNodeBackend brnode;
    SMgrRelation reln;
    bool        found;

    if (SMgrRelationHash == NULL)
    {
        /* First time through: initialize the hash table */
        HASHCTL        ctl;

        MemSet(&ctl, 0, sizeof(ctl));
        ctl.keysize = sizeof(RelFileNodeBackend);
        ctl.entrysize = sizeof(SMgrRelationData);
        SMgrRelationHash = hash_create("smgr relation table", 400,
                                       &ctl, HASH_ELEM | HASH_BLOBS);
        first_unowned_reln = NULL;
    }

    /* Look up or create an entry */
    brnode.node = rnode;
    brnode.backend = backend;
    reln = (SMgrRelation) hash_search(SMgrRelationHash,
                                      (void *) &brnode,
                                      HASH_ENTER, &found);

    /* Initialize it if not present before */
    if (!found)
    {
        int            forknum;
        int            i;

        /* hash_search already filled in the lookup key */
        reln->smgr_owner = NULL;
        reln->smgr_targblock = InvalidBlockNumber;
#ifdef _SHARDING_
        for(i=0; i<SMGR_TARGBLOCK_MAX_SHARDS; i++)
        {
            //InitShardTargBlock(&reln->smgr_shard_targblocks[i]);
            reln->smgr_shard_targblocks[i] = InvalidBlockNumber;
        }
        //MemSet(reln->smgr_shard_targblocks, 0, sizeof(reln->smgr_shard_targblocks));
        //reln->smgr_shard_tb_lasthit = -1;
        reln->smgr_ema_nblocks = InvalidBlockNumber;
#endif
        reln->smgr_fsm_nblocks = InvalidBlockNumber;
        reln->smgr_vm_nblocks = InvalidBlockNumber;
        reln->smgr_which = 0;    /* we only have md.c at present */

        /* mark it not open */
        for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
            reln->md_num_open_segs[forknum] = 0;

#ifdef _MLS_
        rel_crypt_struct_init(&(reln->smgr_relcrypt));
#endif

        /* it has no owner yet */
        add_to_unowned_list(reln);
    }

#ifdef _MLS_
    rel_crypt_hash_lookup(&rnode, &(reln->smgr_relcrypt));
#endif

    return reln;
}

/*
 * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
 *
 * There can be only one owner at a time; this is sufficient since currently
 * the only such owners exist in the relcache.
 */
void
smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
{
    /* We don't support "disowning" an SMgrRelation here, use smgrclearowner */
    Assert(owner != NULL);

    /*
     * First, unhook any old owner.  (Normally there shouldn't be any, but it
     * seems possible that this can happen during swap_relation_files()
     * depending on the order of processing.  It's ok to close the old
     * relcache entry early in that case.)
     *
     * If there isn't an old owner, then the reln should be in the unowned
     * list, and we need to remove it.
     */
    if (reln->smgr_owner)
        *(reln->smgr_owner) = NULL;
    else
        remove_from_unowned_list(reln);

    /* Now establish the ownership relationship. */
    reln->smgr_owner = owner;
    *owner = reln;
}

/*
 * smgrclearowner() -- Remove long-lived reference to an SMgrRelation object
 *                       if one exists
 */
void
smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
{
    /* Do nothing if the SMgrRelation object is not owned by the owner */
    if (reln->smgr_owner != owner)
        return;

    /* unset the owner's reference */
    *owner = NULL;

    /* unset our reference to the owner */
    reln->smgr_owner = NULL;

    add_to_unowned_list(reln);
}

/*
 * add_to_unowned_list -- link an SMgrRelation onto the unowned list
 *
 * Check remove_from_unowned_list()'s comments for performance
 * considerations.
 */
static void
add_to_unowned_list(SMgrRelation reln)
{
    /* place it at head of the list (to make smgrsetowner cheap) */
    reln->next_unowned_reln = first_unowned_reln;
    first_unowned_reln = reln;
}

/*
 * remove_from_unowned_list -- unlink an SMgrRelation from the unowned list
 *
 * If the reln is not present in the list, nothing happens.  Typically this
 * would be caller error, but there seems no reason to throw an error.
 *
 * In the worst case this could be rather slow; but in all the cases that seem
 * likely to be performance-critical, the reln being sought will actually be
 * first in the list.  Furthermore, the number of unowned relns touched in any
 * one transaction shouldn't be all that high typically.  So it doesn't seem
 * worth expending the additional space and management logic needed for a
 * doubly-linked list.
 */
static void
remove_from_unowned_list(SMgrRelation reln)
{
    SMgrRelation *link;
    SMgrRelation cur;

    for (link = &first_unowned_reln, cur = *link;
         cur != NULL;
         link = &cur->next_unowned_reln, cur = *link)
    {
        if (cur == reln)
        {
            *link = cur->next_unowned_reln;
            cur->next_unowned_reln = NULL;
            break;
        }
    }
}

/*
 *    smgrexists() -- Does the underlying file for a fork exist?
 */
bool
smgrexists(SMgrRelation reln, ForkNumber forknum)
{
    return (*(smgrsw[reln->smgr_which].smgr_exists)) (reln, forknum);
}

/*
 *    smgrclose() -- Close and delete an SMgrRelation object.
 */
void
smgrclose(SMgrRelation reln)
{
    SMgrRelation *owner;
    ForkNumber    forknum;

    for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
        (*(smgrsw[reln->smgr_which].smgr_close)) (reln, forknum);

    owner = reln->smgr_owner;

    if (!owner)
        remove_from_unowned_list(reln);

    if (hash_search(SMgrRelationHash,
                    (void *) &(reln->smgr_rnode),
                    HASH_REMOVE, NULL) == NULL)
        elog(ERROR, "SMgrRelation hashtable corrupted");

    /*
     * Unhook the owner pointer, if any.  We do this last since in the remote
     * possibility of failure above, the SMgrRelation object will still exist.
     */
    if (owner)
        *owner = NULL;
}

/*
 *    smgrcloseall() -- Close all existing SMgrRelation objects.
 */
void
smgrcloseall(void)
{
    HASH_SEQ_STATUS status;
    SMgrRelation reln;

    /* Nothing to do if hashtable not set up */
    if (SMgrRelationHash == NULL)
        return;

    hash_seq_init(&status, SMgrRelationHash);

    while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
        smgrclose(reln);
}

/*
 *    smgrclosenode() -- Close SMgrRelation object for given RelFileNode,
 *                       if one exists.
 *
 * This has the same effects as smgrclose(smgropen(rnode)), but it avoids
 * uselessly creating a hashtable entry only to drop it again when no
 * such entry exists already.
 */
void
smgrclosenode(RelFileNodeBackend rnode)
{
    SMgrRelation reln;

    /* Nothing to do if hashtable not set up */
    if (SMgrRelationHash == NULL)
        return;

    reln = (SMgrRelation) hash_search(SMgrRelationHash,
                                      (void *) &rnode,
                                      HASH_FIND, NULL);
    if (reln != NULL)
        smgrclose(reln);
}

/*
 *    smgrcreate() -- Create a new relation.
 *
 *        Given an already-created (but presumably unused) SMgrRelation,
 *        cause the underlying disk file or other storage for the fork
 *        to be created.
 *
 *        If isRedo is true, it is okay for the underlying file to exist
 *        already because we are in a WAL replay sequence.
 */
void
smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
{
    /*
     * Exit quickly in WAL replay mode if we've already opened the file. If
     * it's open, it surely must exist.
     */
    if (isRedo && reln->md_num_open_segs[forknum] > 0)
        return;

    /*
     * We may be using the target table space for the first time in this
     * database, so create a per-database subdirectory if needed.
     *
     * XXX this is a fairly ugly violation of module layering, but this seems
     * to be the best place to put the check.  Maybe TablespaceCreateDbspace
     * should be here and not in commands/tablespace.c?  But that would imply
     * importing a lot of stuff that smgr.c oughtn't know, either.
     */
    TablespaceCreateDbspace(reln->smgr_rnode.node.spcNode,
                            reln->smgr_rnode.node.dbNode,
                            isRedo);

    (*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo);
}

/*
 *    smgrdounlink() -- Immediately unlink all forks of a relation.
 *
 *        All forks of the relation are removed from the store.  This should
 *        not be used during transactional operations, since it can't be undone.
 *
 *        If isRedo is true, it is okay for the underlying file(s) to be gone
 *        already.
 *
 *        This is equivalent to calling smgrdounlinkfork for each fork, but
 *        it's significantly quicker so should be preferred when possible.
 */
void
smgrdounlink(SMgrRelation reln, bool isRedo)
{
    RelFileNodeBackend rnode = reln->smgr_rnode;
    int            which = reln->smgr_which;
    ForkNumber    forknum;

    /* Close the forks at smgr level */
    for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
        (*(smgrsw[which].smgr_close)) (reln, forknum);

    /*
     * Get rid of any remaining buffers for the relation.  bufmgr will just
     * drop them without bothering to write the contents.
     */
    DropRelFileNodesAllBuffers(&rnode, 1);

    /*
     * It'd be nice to tell the stats collector to forget it immediately, too.
     * But we can't because we don't know the OID (and in cases involving
     * relfilenode swaps, it's not always clear which table OID to forget,
     * anyway).
     */

    /*
     * Send a shared-inval message to force other backends to close any
     * dangling smgr references they may have for this rel.  We should do this
     * before starting the actual unlinking, in case we fail partway through
     * that step.  Note that the sinval message will eventually come back to
     * this backend, too, and thereby provide a backstop that we closed our
     * own smgr rel.
     */
    CacheInvalidateSmgr(rnode);

    /*
     * Delete the physical file(s).
     *
     * Note: smgr_unlink must treat deletion failure as a WARNING, not an
     * ERROR, because we've already decided to commit or abort the current
     * xact.
     */
    (*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo);
}

/*
 *    smgrdounlinkall() -- Immediately unlink all forks of all given relations
 *
 *        All forks of all given relations are removed from the store.  This
 *        should not be used during transactional operations, since it can't be
 *        undone.
 *
 *        If isRedo is true, it is okay for the underlying file(s) to be gone
 *        already.
 *
 *        This is equivalent to calling smgrdounlink for each relation, but it's
 *        significantly quicker so should be preferred when possible.
 */
void
smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
{
    int            i = 0;
    RelFileNodeBackend *rnodes;
    ForkNumber    forknum;

    if (nrels == 0)
        return;

    /*
     * create an array which contains all relations to be dropped, and close
     * each relation's forks at the smgr level while at it
     */
    rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
    for (i = 0; i < nrels; i++)
    {
        RelFileNodeBackend rnode = rels[i]->smgr_rnode;
        int            which = rels[i]->smgr_which;

        rnodes[i] = rnode;

        /* Close the forks at smgr level */
        for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
            (*(smgrsw[which].smgr_close)) (rels[i], forknum);
    }

    /*
     * Get rid of any remaining buffers for the relations.  bufmgr will just
     * drop them without bothering to write the contents.
     */
    DropRelFileNodesAllBuffers(rnodes, nrels);

    /*
     * It'd be nice to tell the stats collector to forget them immediately,
     * too. But we can't because we don't know the OIDs.
     */

    /*
     * Send a shared-inval message to force other backends to close any
     * dangling smgr references they may have for these rels.  We should do
     * this before starting the actual unlinking, in case we fail partway
     * through that step.  Note that the sinval messages will eventually come
     * back to this backend, too, and thereby provide a backstop that we
     * closed our own smgr rel.
     */
    for (i = 0; i < nrels; i++)
        CacheInvalidateSmgr(rnodes[i]);

    /*
     * Delete the physical file(s).
     *
     * Note: smgr_unlink must treat deletion failure as a WARNING, not an
     * ERROR, because we've already decided to commit or abort the current
     * xact.
     */

    for (i = 0; i < nrels; i++)
    {
        int            which = rels[i]->smgr_which;

        for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
            (*(smgrsw[which].smgr_unlink)) (rnodes[i], forknum, isRedo);
    }

    pfree(rnodes);
}

/*
 *    smgrdounlinkfork() -- Immediately unlink one fork of a relation.
 *
 *        The specified fork of the relation is removed from the store.  This
 *        should not be used during transactional operations, since it can't be
 *        undone.
 *
 *        If isRedo is true, it is okay for the underlying file to be gone
 *        already.
 */
void
smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo)
{
    RelFileNodeBackend rnode = reln->smgr_rnode;
    int            which = reln->smgr_which;

    /* Close the fork at smgr level */
    (*(smgrsw[which].smgr_close)) (reln, forknum);

    /*
     * Get rid of any remaining buffers for the fork.  bufmgr will just drop
     * them without bothering to write the contents.
     */
    DropRelFileNodeBuffers(rnode, forknum, 0);

    /*
     * It'd be nice to tell the stats collector to forget it immediately, too.
     * But we can't because we don't know the OID (and in cases involving
     * relfilenode swaps, it's not always clear which table OID to forget,
     * anyway).
     */

    /*
     * Send a shared-inval message to force other backends to close any
     * dangling smgr references they may have for this rel.  We should do this
     * before starting the actual unlinking, in case we fail partway through
     * that step.  Note that the sinval message will eventually come back to
     * this backend, too, and thereby provide a backstop that we closed our
     * own smgr rel.
     */
    CacheInvalidateSmgr(rnode);

    /*
     * Delete the physical file(s).
     *
     * Note: smgr_unlink must treat deletion failure as a WARNING, not an
     * ERROR, because we've already decided to commit or abort the current
     * xact.
     */
    (*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo);
}

/*
 *    smgrextend() -- Add a new block to a file.
 *
 *        The semantics are nearly the same as smgrwrite(): write at the
 *        specified position.  However, this is to be used for the case of
 *        extending a relation (i.e., blocknum is at or beyond the current
 *        EOF).  Note that we assume writing a block beyond current EOF
 *        causes intervening file space to become filled with zeroes.
 */
void
smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
           char *buffer, bool skipFsync)
{
    (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum,
                                               buffer, skipFsync);
}

/*
 *    smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
 */
void
smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
{
    (*(smgrsw[reln->smgr_which].smgr_prefetch)) (reln, forknum, blocknum);
}

/*
 *    smgrread() -- read a particular block from a relation into the supplied
 *                  buffer.
 *
 *        This routine is called from the buffer manager in order to
 *        instantiate pages in the shared buffer cache.  All storage managers
 *        return pages in the format that POSTGRES expects.
 */
void
smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
         char *buffer)
{
    (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
}

/*
 *    smgrwrite() -- Write the supplied buffer out.
 *
 *        This is to be used only for updating already-existing blocks of a
 *        relation (ie, those before the current EOF).  To extend a relation,
 *        use smgrextend().
 *
 *        This is not a synchronous write -- the block is not necessarily
 *        on disk at return, only dumped out to the kernel.  However,
 *        provisions will be made to fsync the write before the next checkpoint.
 *
 *        skipFsync indicates that the caller will make other provisions to
 *        fsync the relation, so we needn't bother.  Temporary relations also
 *        do not require fsync.
 */
void
smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
          char *buffer, bool skipFsync)
{
    (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum,
                                              buffer, skipFsync);
}


/*
 *    smgrwriteback() -- Trigger kernel writeback for the supplied range of
 *                       blocks.
 */
void
smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
              BlockNumber nblocks)
{
    (*(smgrsw[reln->smgr_which].smgr_writeback)) (reln, forknum, blocknum,
                                                  nblocks);
}

/*
 *    smgrnblocks() -- Calculate the number of blocks in the
 *                     supplied relation.
 */
BlockNumber
smgrnblocks(SMgrRelation reln, ForkNumber forknum)
{
    return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum);
}

/*
 *    smgrtruncate() -- Truncate supplied relation to the specified number
 *                      of blocks
 *
 * The truncation is done immediately, so this can't be rolled back.
 */
void
smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
{
    /*
     * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
     * just drop them without bothering to write the contents.
     */
    DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nblocks);

    /*
     * Send a shared-inval message to force other backends to close any smgr
     * references they may have for this rel.  This is useful because they
     * might have open file pointers to segments that got removed, and/or
     * smgr_targblock variables pointing past the new rel end.  (The inval
     * message will come back to our backend, too, causing a
     * probably-unnecessary local smgr flush.  But we don't expect that this
     * is a performance-critical path.)  As in the unlink code, we want to be
     * sure the message is sent before we start changing things on-disk.
     */
    CacheInvalidateSmgr(reln->smgr_rnode);

    /*
     * Do the truncation.
     */
    (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks);
}

/*
 *    smgrimmedsync() -- Force the specified relation to stable storage.
 *
 *        Synchronously force all previous writes to the specified relation
 *        down to disk.
 *
 *        This is useful for building completely new relations (eg, new
 *        indexes).  Instead of incrementally WAL-logging the index build
 *        steps, we can just write completed index pages to disk with smgrwrite
 *        or smgrextend, and then fsync the completed index file before
 *        committing the transaction.  (This is sufficient for purposes of
 *        crash recovery, since it effectively duplicates forcing a checkpoint
 *        for the completed index.  But it is *not* sufficient if one wishes
 *        to use the WAL log for PITR or replication purposes: in that case
 *        we have to make WAL entries as well.)
 *
 *        The preceding writes should specify skipFsync = true to avoid
 *        duplicative fsyncs.
 *
 *        Note that you need to do FlushRelationBuffers() first if there is
 *        any possibility that there are dirty buffers for the relation;
 *        otherwise the sync is not very meaningful.
 */
void
smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
{
    (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln, forknum);
}


/*
 *    smgrpreckpt() -- Prepare for checkpoint.
 */
void
smgrpreckpt(void)
{
    int            i;

    for (i = 0; i < NSmgr; i++)
    {
        if (smgrsw[i].smgr_pre_ckpt)
            (*(smgrsw[i].smgr_pre_ckpt)) ();
    }
}

/*
 *    smgrsync() -- Sync files to disk during checkpoint.
 */
void
smgrsync(void)
{
    int            i;

    for (i = 0; i < NSmgr; i++)
    {
        if (smgrsw[i].smgr_sync)
            (*(smgrsw[i].smgr_sync)) ();
    }
}

/*
 *    smgrpostckpt() -- Post-checkpoint cleanup.
 */
void
smgrpostckpt(void)
{
    int            i;

    for (i = 0; i < NSmgr; i++)
    {
        if (smgrsw[i].smgr_post_ckpt)
            (*(smgrsw[i].smgr_post_ckpt)) ();
    }
}

#ifdef _SHARDING_
void
smgrdealloc(SMgrRelation reln, ForkNumber forknum, BlockNumber from_blk)
{
#ifndef DISABLE_FALLOCATE
    (*(smgrsw[reln->smgr_which].smgr_dealloc)) (reln, forknum, from_blk);
#endif
}

void
smgrrealloc(SMgrRelation reln, ForkNumber forknum, BlockNumber from_blk)
{
    (*(smgrsw[reln->smgr_which].smgr_realloc)) (reln, forknum, from_blk);
}
#endif

/*
 * AtEOXact_SMgr
 *
 * This routine is called during transaction commit or abort (it doesn't
 * particularly care which).  All transient SMgrRelation objects are closed.
 *
 * We do this as a compromise between wanting transient SMgrRelations to
 * live awhile (to amortize the costs of blind writes of multiple blocks)
 * and needing them to not live forever (since we're probably holding open
 * a kernel file descriptor for the underlying file, and we need to ensure
 * that gets closed reasonably soon if the file gets deleted).
 */
void
AtEOXact_SMgr(void)
{
    /*
     * Zap all unowned SMgrRelations.  We rely on smgrclose() to remove each
     * one from the list.
     */
    while (first_unowned_reln != NULL)
    {
        Assert(first_unowned_reln->smgr_owner == NULL);
        smgrclose(first_unowned_reln);
    }
}

#ifdef _SHARDING_
BlockNumber 
smgr_get_target_block(SMgrRelation rel, ShardID shardid)
{
    //int i;
    
    if(!ShardIDIsValid(shardid))
        return rel->smgr_targblock;

    return rel->smgr_shard_targblocks[shardid];
#if 0
    /* fast path to search blocknumber for shard */
    if(rel->smgr_shard_tb_lasthit >=0 && rel->smgr_shard_tb_lasthit < SMGR_TARGBLOCK_MAX_SHARDS)
    {
        if(rel->smgr_shard_targblocks[rel->smgr_shard_tb_lasthit].shardid == shardid)
        {
            rel->smgr_shard_targblocks[rel->smgr_shard_tb_lasthit].hits++;
            return rel->smgr_shard_targblocks[rel->smgr_shard_tb_lasthit].targblk;
        }
    }        

    for(i=0; i<SMGR_TARGBLOCK_MAX_SHARDS; i++)
    {        
        ShardTargBlock * blk =    &(rel->smgr_shard_targblocks[i]);
        if(blk->shardid == shardid)
        {
            blk->hits++;
            rel->smgr_shard_tb_lasthit = i;
            return blk->targblk;
        }
    }
    
    rel->smgr_shard_tb_lasthit = -1;
    return InvalidBlockNumber;
#endif
}

void
smgr_set_target_block(SMgrRelation rel, ShardID shardid, BlockNumber blkno)
{
    //int min_hits = PG_INT32_MAX;
    //int min_hits_idx = -1;
    //int i;
    if(!rel->smgr_hasextent)
    {
        rel->smgr_targblock = blkno;
        return;
    }

    rel->smgr_shard_targblocks[shardid] = blkno;
#if 0
    for(i=0; i<SMGR_TARGBLOCK_MAX_SHARDS; i++)
    {        
        ShardTargBlock * blk =    &(rel->smgr_shard_targblocks[i]);

        if(!ShardIDIsValid(blk->shardid))
        {
            blk->shardid = shardid;
            blk->hits = 1;
            blk->targblk = blkno;
            rel->smgr_shard_tb_lasthit = i;
            return;
        }
        
        if(blk->hits < min_hits)
        {
            min_hits = blk->hits;
            min_hits_idx = i;
        }    
    }

    rel->smgr_shard_targblocks[min_hits_idx].shardid = shardid;
    rel->smgr_shard_targblocks[min_hits_idx].hits = 1;
    rel->smgr_shard_targblocks[min_hits_idx].targblk = blkno;
    rel->smgr_shard_tb_lasthit = min_hits_idx;
    return;
#endif
}

#endif
